{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas\n",
    "from sklearn import model_selection\n",
    "from sklearn.preprocessing import LabelEncoder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pandas.read_csv('../data/iris.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=data.drop([\"Unnamed: 0\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset=data.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = dataset[:,0:4]\n",
    "Y = dataset[:,4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "label_encoder = LabelEncoder()\n",
    "label_encoder = label_encoder.fit(Y)\n",
    "label_encoded_y = label_encoder.transform(Y)\n",
    "data[\"label\"]=label_encoded_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=data.drop([\"Species\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sepal.Length\n",
      "Sepal.Width\n",
      "Petal.Length\n",
      "Petal.Width\n"
     ]
    }
   ],
   "source": [
    "for x in data:\n",
    "    if x !=\"label\":\n",
    "        data[x] = (data[x] - data[x].min()) / (data[x].max() - data[x].min())\n",
    "        print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_y=data[\"label\"]\n",
    "train_x=data[[\"Sepal.Length\",\"Sepal.Width\",\"Petal.Length\",\"Petal.Width\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed = 7\n",
    "test_size = 0.33\n",
    "X_train, X_test, y_train, y_test = model_selection.train_test_split(train_x, train_y, test_size=test_size, random_state=seed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data=X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_data=X_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data.to_csv(\"../data/iris_train_data.csv\")\n",
    "test_data.to_csv(\"../data/iris_test_data.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_test.to_csv(\"../data/iris_test_data_label.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train.to_csv(\"../data/iris_train_data_label.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
